-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[DirectX] ForwardHandle needs to check if globals were stored on allocas #151751
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
fixes llvm#140819 SROA pass is making it so that some globals get loaded into stack allocations. This means we find an alloca where we use to expect a load and now need to walk an alloca -> store -> maybe load chain before we find the global. Doing so fixes All but two instances of llvm#137715 And fixes every instance of `Load of "8.sroa.0" is not a global resource handle we are currently seeing in the DML shaders.
@llvm/pr-subscribers-backend-directx Author: Farzon Lotfi (farzonl) Changesfixes #140819 SROA pass is making it so that some globals get loaded into stack allocations. This means we find an alloca where we use to expect a load and now need to walk an alloca -> store -> maybe load chain before we find the global. Doing so fixes All but two instances of #137715 And fixes every instance of `Load of "8.sroa.0" is not a global resource handle we are currently seeing in the DML shaders. Full diff: https://github.com/llvm/llvm-project/pull/151751.diff 2 Files Affected:
diff --git a/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp b/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp
index 73abfe7c48584..747472b1a4bc1 100644
--- a/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp
+++ b/llvm/lib/Target/DirectX/DXILForwardHandleAccesses.cpp
@@ -87,17 +87,48 @@ static bool forwardHandleAccesses(Function &F, DominatorTree &DT) {
for (LoadInst *LI : LoadsToProcess) {
Value *V = LI->getPointerOperand();
- auto *GV = dyn_cast<GlobalVariable>(LI->getPointerOperand());
+ auto *GV = dyn_cast<GlobalVariable>(V);
// If we didn't find the global, we may need to walk through a level of
// indirection. This generally happens at -O0.
- if (!GV)
+ if (!GV) {
if (auto *NestedLI = dyn_cast<LoadInst>(V)) {
BasicBlock::iterator BBI(NestedLI);
Value *Loaded = FindAvailableLoadedValue(
NestedLI, NestedLI->getParent(), BBI, 0, nullptr, nullptr);
GV = dyn_cast_or_null<GlobalVariable>(Loaded);
+ } else if (auto *NestedAlloca = dyn_cast<AllocaInst>(V)) {
+ for (auto &Use : NestedAlloca->uses()) {
+ auto *Store = dyn_cast<StoreInst>(Use.getUser());
+ if (!Store)
+ continue;
+
+ Value *StoredVal = Store->getValueOperand();
+ if (!StoredVal)
+ continue;
+
+ // Try direct global match
+ GV = dyn_cast<GlobalVariable>(StoredVal);
+ if (GV)
+ break;
+
+ // If it's a load, check its source
+ if (auto *Load = dyn_cast<LoadInst>(StoredVal)) {
+ GV = dyn_cast<GlobalVariable>(Load->getPointerOperand());
+ if (GV)
+ break;
+
+ // Try to find available loaded value
+ BasicBlock::iterator BBI(Load);
+ Value *Loaded = FindAvailableLoadedValue(Load, Load->getParent(),
+ BBI, 0, nullptr, nullptr);
+ GV = dyn_cast<GlobalVariable>(Loaded);
+ if (GV)
+ break;
+ }
+ }
}
+ }
auto It = HandleMap.find(GV);
if (It == HandleMap.end()) {
diff --git a/llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll b/llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll
new file mode 100644
index 0000000000000..0c1ccb50caee7
--- /dev/null
+++ b/llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -dxil-forward-handle-accesses %s | FileCheck %s
+
+%"class.hlsl::RWStructuredBuffer" = type { target("dx.RawBuffer", i32, 1, 0) }
+@_ZL4dest = internal unnamed_addr global %"class.hlsl::RWStructuredBuffer" poison, align 4
+@.str = private unnamed_addr constant [5 x i8] c"dest\00", align 1
+
+
+; NOTE: intent of this test is to confirm load target("dx.RawBuffer", i32, 1, 0)
+; is replaced with call @llvm.dx.resource.getpointer
+define void @CSMain() local_unnamed_addr {
+; CHECK-LABEL: define void @CSMain() local_unnamed_addr {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[AGG_TMP_I1_SROA_0:%.*]] = alloca target("dx.RawBuffer", i32, 1, 0), align 8
+; CHECK-NEXT: [[TMP0:%.*]] = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 3, i32 1, i32 0, i1 false, ptr nonnull @.str)
+; CHECK-NEXT: store target("dx.RawBuffer", i32, 1, 0) [[TMP0]], ptr @_ZL4dest, align 4
+; CHECK-NEXT: [[TMP1:%.*]] = tail call i32 @llvm.dx.thread.id(i32 0)
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr @_ZL4dest, align 4
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull [[AGG_TMP_I1_SROA_0]])
+; CHECK-NEXT: store i32 [[TMP2]], ptr [[AGG_TMP_I1_SROA_0]], align 8
+; CHECK-NEXT: [[TMP3:%.*]] = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) [[TMP0]], i32 [[TMP1]])
+; CHECK-NEXT: store i32 0, ptr [[TMP3]], align 4
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[AGG_TMP_I1_SROA_0]])
+; CHECK-NEXT: ret void
+;
+entry:
+ %agg.tmp.i1.sroa.0 = alloca target("dx.RawBuffer", i32, 1, 0), align 8
+ %0 = tail call target("dx.RawBuffer", i32, 1, 0) @llvm.dx.resource.handlefrombinding.tdx.RawBuffer_i32_1_0t(i32 0, i32 3, i32 1, i32 0, i1 false, ptr nonnull @.str)
+ store target("dx.RawBuffer", i32, 1, 0) %0, ptr @_ZL4dest, align 4
+ %1 = tail call i32 @llvm.dx.thread.id(i32 0)
+ %2 = load i32, ptr @_ZL4dest, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %agg.tmp.i1.sroa.0)
+ store i32 %2, ptr %agg.tmp.i1.sroa.0, align 8
+ %agg.tmp.i1.sroa.0.0.agg.tmp.i1.sroa.0.0.agg.tmp.i1.sroa.0.0.agg.tmp.i1.sroa.0.0. = load target("dx.RawBuffer", i32, 1, 0), ptr %agg.tmp.i1.sroa.0, align 8
+ %3 = tail call noundef nonnull align 4 dereferenceable(4) ptr @llvm.dx.resource.getpointer.p0.tdx.RawBuffer_i32_1_0t(target("dx.RawBuffer", i32, 1, 0) %agg.tmp.i1.sroa.0.0.agg.tmp.i1.sroa.0.0.agg.tmp.i1.sroa.0.0.agg.tmp.i1.sroa.0.0., i32 %1)
+ store i32 0, ptr %3, align 4
+ call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %agg.tmp.i1.sroa.0)
+ ret void
+}
|
LGTM. |
llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll
Outdated
Show resolved
Hide resolved
llvm/test/CodeGen/DirectX/issue-140819_allow_forward_handle_on_alloca.ll
Outdated
Show resolved
Hide resolved
Failure is in compilerRT not any of our code, going to bypass merge rule. |
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/190/builds/24994 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/141/builds/10645 Here is the relevant piece of the build log for the reference
|
fixes #140819
SROA pass is making it so that some globals get loaded into stack allocations. This means we find an alloca where we use to expect a load and now need to walk an alloca -> store -> maybe load chain before we find the global. Doing so fixes All but two instances of #137715 And fixes every instance of `Load of "8.sroa.0" is not a global resource handle we are currently seeing in the DML shaders.